Inflater xref

View Javadoc

1   // Inflater.java, created Mon Jul  8  4:06:18 2002 by joewhaley
2   // Copyright (C) 2001-3 John Whaley <jwhaley@alum.mit.edu>
3   // Licensed under the terms of the GNU LGPL; see COPYING for details.
4   
5   package joeq.ClassLib.Common.java.util.zip;
6   
7   import java.util.zip.DataFormatException;
8   
9   /***
10   * Inflater
11   *
12   * @author  John Whaley <jwhaley@alum.mit.edu>
13   * @version $Id: Inflater.java 1712 2004-04-28 17:36:13Z joewhaley $
14   */
15  public class Inflater {
16      
17      private static void initIDs() {
18      }
19  
20      /* Copy lengths for literal codes 257..285 */
21      private static final int CPLENS[] =
22          {
23              3,
24              4,
25              5,
26              6,
27              7,
28              8,
29              9,
30              10,
31              11,
32              13,
33              15,
34              17,
35              19,
36              23,
37              27,
38              31,
39              35,
40              43,
41              51,
42              59,
43              67,
44              83,
45              99,
46              115,
47              131,
48              163,
49              195,
50              227,
51              258 };
52  
53      /* Extra bits for literal codes 257..285 */
54      private static final int CPLEXT[] =
55          {
56              0,
57              0,
58              0,
59              0,
60              0,
61              0,
62              0,
63              0,
64              1,
65              1,
66              1,
67              1,
68              2,
69              2,
70              2,
71              2,
72              3,
73              3,
74              3,
75              3,
76              4,
77              4,
78              4,
79              4,
80              5,
81              5,
82              5,
83              5,
84              0 };
85  
86      /* Copy offsets for distance codes 0..29 */
87      private static final int CPDIST[] =
88          {
89              1,
90              2,
91              3,
92              4,
93              5,
94              7,
95              9,
96              13,
97              17,
98              25,
99              33,
100             49,
101             65,
102             97,
103             129,
104             193,
105             257,
106             385,
107             513,
108             769,
109             1025,
110             1537,
111             2049,
112             3073,
113             4097,
114             6145,
115             8193,
116             12289,
117             16385,
118             24577 };
119 
120     /* Extra bits for distance codes */
121     private static final int CPDEXT[] =
122         {
123             0,
124             0,
125             0,
126             0,
127             1,
128             1,
129             2,
130             2,
131             3,
132             3,
133             4,
134             4,
135             5,
136             5,
137             6,
138             6,
139             7,
140             7,
141             8,
142             8,
143             9,
144             9,
145             10,
146             10,
147             11,
148             11,
149             12,
150             12,
151             13,
152             13 };
153 
154     /* This are the state in which the inflater can be.  */
155     private static final int DECODE_HEADER = 0;
156     private static final int DECODE_DICT = 1;
157     private static final int DECODE_BLOCKS = 2;
158     private static final int DECODE_STORED_LEN1 = 3;
159     private static final int DECODE_STORED_LEN2 = 4;
160     private static final int DECODE_STORED = 5;
161     private static final int DECODE_DYN_HEADER = 6;
162     private static final int DECODE_HUFFMAN = 7;
163     private static final int DECODE_HUFFMAN_LENBITS = 8;
164     private static final int DECODE_HUFFMAN_DIST = 9;
165     private static final int DECODE_HUFFMAN_DISTBITS = 10;
166     private static final int DECODE_CHKSUM = 11;
167     private static final int FINISHED = 12;
168 
169     /*** This variable contains the current state. */
170     private int mode;
171 
172     /***
173      * The adler checksum of the dictionary or of the decompressed
174      * stream, as it is written in the header resp. footer of the
175      * compressed stream.  <br>
176      *
177      * Only valid if mode is DECODE_DICT or DECODE_CHKSUM.
178      */
179     private int readAdler;
180     /*** 
181      * The number of bits needed to complete the current state.  This
182      * is valid, if mode is DECODE_DICT, DECODE_CHKSUM,
183      * DECODE_HUFFMAN_LENBITS or DECODE_HUFFMAN_DISTBITS.  
184      */
185     private int neededBits;
186     private int repLength, repDist;
187     private int uncomprLen;
188     /***
189      * True, if the last block flag was set in the last block of the
190      * inflated stream.  This means that the stream ends after the
191      * current block.  
192      */
193     private boolean isLastBlock;
194 
195     /***
196      * The total number of inflated bytes.
197      */
198     private int totalOut;
199     /***
200      * The total number of bytes set with setInput().  This is not the
201      * value returned by getTotalIn(), since this also includes the 
202      * unprocessed input.
203      */
204     private int totalIn;
205     /***
206      * This variable stores the nowrap flag that was given to the constructor.
207      * True means, that the inflated stream doesn't contain a header nor the
208      * checksum in the footer.
209      */
210     private boolean nowrap;
211 
212     private StreamManipulator input;
213     private OutputWindow outputWindow;
214     private InflaterDynHeader dynHeader;
215     private InflaterHuffmanTree litlenTree, distTree;
216     private Adler32 adler;
217 
218     /***
219      * Creates a new inflater.
220      */
221     public Inflater() {
222         this(false);
223     }
224 
225     /***
226      * Creates a new inflater.
227      * @param nowrap true if no header and checksum field appears in the
228      * stream.  This is used for GZIPed input.  For compatibility with
229      * Sun JDK you should provide one byte of input more than needed in
230      * this case.
231      */
232     public Inflater(boolean nowrap) {
233         this.__init__(nowrap);
234     }
235 
236     public void __init__(boolean nowrap) {
237         this.nowrap = nowrap;
238         this.adler = new Adler32();
239         input = new StreamManipulator();
240         outputWindow = new OutputWindow();
241         mode = nowrap ? DECODE_BLOCKS : DECODE_HEADER;
242     }
243 
244     /***
245      * Resets the inflater so that a new stream can be decompressed.  All
246      * pending input and output will be discarded.
247      */
248     public void reset() {
249         mode = nowrap ? DECODE_BLOCKS : DECODE_HEADER;
250         totalIn = totalOut = 0;
251         input.reset();
252         outputWindow.reset();
253         dynHeader = null;
254         litlenTree = null;
255         distTree = null;
256         isLastBlock = false;
257         adler.reset();
258     }
259 
260     /***
261      * Decodes the deflate header.
262      * @return false if more input is needed. 
263      * @exception DataFormatException if header is invalid.
264      */
265     private boolean decodeHeader() throws DataFormatException {
266         int header = input.peekBits(16);
267         if (header < 0)
268             return false;
269         input.dropBits(16);
270 
271         /* The header is written in "wrong" byte order */
272         header = ((header << 8) | (header >> 8)) & 0xffff;
273         if (header % 31 != 0)
274             throw new DataFormatException("Header checksum illegal");
275 
276         if ((header & 0x0f00) != (Deflater.DEFLATED << 8))
277             throw new DataFormatException("Compression Method unknown");
278 
279         /* Maximum size of the backwards window in bits. 
280          * We currently ignore this, but we could use it to make the
281          * inflater window more space efficient. On the other hand the
282          * full window (15 bits) is needed most times, anyway.
283          int max_wbits = ((header & 0x7000) >> 12) + 8;
284          */
285 
286         if ((header & 0x0020) == 0) // Dictionary flag?
287             {
288             mode = DECODE_BLOCKS;
289         } else {
290             mode = DECODE_DICT;
291             neededBits = 32;
292         }
293         return true;
294     }
295 
296     /***
297      * Decodes the dictionary checksum after the deflate header.
298      * @return false if more input is needed. 
299      */
300     private boolean decodeDict() {
301         while (neededBits > 0) {
302             int dictByte = input.peekBits(8);
303             if (dictByte < 0)
304                 return false;
305             input.dropBits(8);
306             readAdler = (readAdler << 8) | dictByte;
307             neededBits -= 8;
308         }
309         return false;
310     }
311 
312     /***
313      * Decodes the huffman encoded symbols in the input stream.
314      * @return false if more input is needed, true if output window is
315      * full or the current block ends.
316      * @exception DataFormatException if deflated stream is invalid.  
317      */
318     private boolean decodeHuffman() throws DataFormatException {
319         int free = outputWindow.getFreeSpace();
320         while (free >= 258) {
321             int symbol;
322             switch (mode) {
323                 case DECODE_HUFFMAN :
324                     /* This is the inner loop so it is optimized a bit */
325                     while (((symbol = litlenTree.getSymbol(input)) & ~0xff)
326                         == 0) {
327                         outputWindow.write(symbol);
328                         if (--free < 258)
329                             return true;
330                     }
331                     if (symbol < 257) {
332                         if (symbol < 0)
333                             return false;
334                         else {
335                             /* symbol == 256: end of block */
336                             distTree = null;
337                             litlenTree = null;
338                             mode = DECODE_BLOCKS;
339                             return true;
340                         }
341                     }
342 
343                     try {
344                         repLength = CPLENS[symbol - 257];
345                         neededBits = CPLEXT[symbol - 257];
346                     } catch (ArrayIndexOutOfBoundsException ex) {
347                         throw new DataFormatException("Illegal rep length code");
348                     }
349                     /* fall through */
350                 case DECODE_HUFFMAN_LENBITS :
351                     if (neededBits > 0) {
352                         mode = DECODE_HUFFMAN_LENBITS;
353                         int i = input.peekBits(neededBits);
354                         if (i < 0)
355                             return false;
356                         input.dropBits(neededBits);
357                         repLength += i;
358                     }
359                     mode = DECODE_HUFFMAN_DIST;
360                     /* fall through */
361                 case DECODE_HUFFMAN_DIST :
362                     symbol = distTree.getSymbol(input);
363                     if (symbol < 0)
364                         return false;
365                     try {
366                         repDist = CPDIST[symbol];
367                         neededBits = CPDEXT[symbol];
368                     } catch (ArrayIndexOutOfBoundsException ex) {
369                         throw new DataFormatException("Illegal rep dist code");
370                     }
371                     /* fall through */
372                 case DECODE_HUFFMAN_DISTBITS :
373                     if (neededBits > 0) {
374                         mode = DECODE_HUFFMAN_DISTBITS;
375                         int i = input.peekBits(neededBits);
376                         if (i < 0)
377                             return false;
378                         input.dropBits(neededBits);
379                         repDist += i;
380                     }
381                     outputWindow.repeat(repLength, repDist);
382                     free -= repLength;
383                     mode = DECODE_HUFFMAN;
384                     break;
385                 default :
386                     throw new IllegalStateException();
387             }
388         }
389         return true;
390     }
391 
392     /***
393      * Decodes the adler checksum after the deflate stream.
394      * @return false if more input is needed. 
395      * @exception DataFormatException if checksum doesn't match.
396      */
397     private boolean decodeChksum() throws DataFormatException {
398         while (neededBits > 0) {
399             int chkByte = input.peekBits(8);
400             if (chkByte < 0)
401                 return false;
402             input.dropBits(8);
403             readAdler = (readAdler << 8) | chkByte;
404             neededBits -= 8;
405         }
406         if ((int) adler.getValue() != readAdler)
407             throw new DataFormatException(
408                 "Adler chksum doesn't match: "
409                     + Integer.toHexString((int) adler.getValue())
410                     + " vs. "
411                     + Integer.toHexString(readAdler));
412         mode = FINISHED;
413         return false;
414     }
415 
416     /***
417      * Decodes the deflated stream.
418      * @return false if more input is needed, or if finished. 
419      * @exception DataFormatException if deflated stream is invalid.
420      */
421     private boolean decode() throws DataFormatException {
422         switch (mode) {
423             case DECODE_HEADER :
424                 return decodeHeader();
425             case DECODE_DICT :
426                 return decodeDict();
427             case DECODE_CHKSUM :
428                 return decodeChksum();
429 
430             case DECODE_BLOCKS :
431                 if (isLastBlock) {
432                     if (nowrap) {
433                         mode = FINISHED;
434                         return false;
435                     } else {
436                         input.skipToByteBoundary();
437                         neededBits = 32;
438                         mode = DECODE_CHKSUM;
439                         return true;
440                     }
441                 }
442 
443                 int type = input.peekBits(3);
444                 if (type < 0)
445                     return false;
446                 input.dropBits(3);
447 
448                 if ((type & 1) != 0)
449                     isLastBlock = true;
450                 switch (type >> 1) {
451                     case DeflaterConstants.STORED_BLOCK :
452                         input.skipToByteBoundary();
453                         mode = DECODE_STORED_LEN1;
454                         break;
455                     case DeflaterConstants.STATIC_TREES :
456                         litlenTree = InflaterHuffmanTree.defLitLenTree;
457                         distTree = InflaterHuffmanTree.defDistTree;
458                         mode = DECODE_HUFFMAN;
459                         break;
460                     case DeflaterConstants.DYN_TREES :
461                         dynHeader = new InflaterDynHeader();
462                         mode = DECODE_DYN_HEADER;
463                         break;
464                     default :
465                         throw new DataFormatException(
466                             "Unknown block type " + type);
467                 }
468                 return true;
469 
470             case DECODE_STORED_LEN1 :
471                 {
472                     if ((uncomprLen = input.peekBits(16)) < 0)
473                         return false;
474                     input.dropBits(16);
475                     mode = DECODE_STORED_LEN2;
476                 }
477                 /* fall through */
478             case DECODE_STORED_LEN2 :
479                 {
480                     int nlen = input.peekBits(16);
481                     if (nlen < 0)
482                         return false;
483                     input.dropBits(16);
484                     if (nlen != (uncomprLen ^ 0xffff))
485                         throw new DataFormatException("broken uncompressed block");
486                     mode = DECODE_STORED;
487                 }
488                 /* fall through */
489             case DECODE_STORED :
490                 {
491                     int more = outputWindow.copyStored(input, uncomprLen);
492                     uncomprLen -= more;
493                     if (uncomprLen == 0) {
494                         mode = DECODE_BLOCKS;
495                         return true;
496                     }
497                     return !input.needsInput();
498                 }
499 
500             case DECODE_DYN_HEADER :
501                 if (!dynHeader.decode(input))
502                     return false;
503                 litlenTree = dynHeader.buildLitLenTree();
504                 distTree = dynHeader.buildDistTree();
505                 mode = DECODE_HUFFMAN;
506                 /* fall through */
507             case DECODE_HUFFMAN :
508             case DECODE_HUFFMAN_LENBITS :
509             case DECODE_HUFFMAN_DIST :
510             case DECODE_HUFFMAN_DISTBITS :
511                 return decodeHuffman();
512             case FINISHED :
513                 return false;
514             default :
515                 throw new IllegalStateException();
516         }
517     }
518 
519     /***
520      * Sets the preset dictionary.  This should only be called, if
521      * needsDictionary() returns true and it should set the same
522      * dictionary, that was used for deflating.  The getAdler()
523      * function returns the checksum of the dictionary needed.
524      * @param buffer the dictionary.
525      * @exception IllegalStateException if no dictionary is needed.
526      * @exception IllegalArgumentException if the dictionary checksum is
527      * wrong.  
528      */
529     public void setDictionary(byte[] buffer) {
530         setDictionary(buffer, 0, buffer.length);
531     }
532 
533     /***
534      * Sets the preset dictionary.  This should only be called, if
535      * needsDictionary() returns true and it should set the same
536      * dictionary, that was used for deflating.  The getAdler()
537      * function returns the checksum of the dictionary needed.
538      * @param buffer the dictionary.
539      * @param off the offset into buffer where the dictionary starts.
540      * @param len the length of the dictionary.
541      * @exception IllegalStateException if no dictionary is needed.
542      * @exception IllegalArgumentException if the dictionary checksum is
543      * wrong.  
544      * @exception IndexOutOfBoundsException if the off and/or len are wrong.
545      */
546     public void setDictionary(byte[] buffer, int off, int len) {
547         if (!needsDictionary())
548             throw new IllegalStateException();
549 
550         adler.update(buffer, off, len);
551         if ((int) adler.getValue() != readAdler)
552             throw new IllegalArgumentException("Wrong adler checksum");
553         adler.reset();
554         outputWindow.copyDict(buffer, off, len);
555         mode = DECODE_BLOCKS;
556     }
557 
558     /***
559      * Sets the input.  This should only be called, if needsInput()
560      * returns true.
561      * @param buf the input.
562      * @exception IllegalStateException if no input is needed.
563      */
564     public void setInput(byte[] buf) {
565         setInput(buf, 0, buf.length);
566     }
567 
568     /***
569      * Sets the input.  This should only be called, if needsInput()
570      * returns true.
571      * @param buf the input.
572      * @param off the offset into buffer where the input starts.
573      * @param len the length of the input.  
574      * @exception IllegalStateException if no input is needed.
575      * @exception IndexOutOfBoundsException if the off and/or len are wrong.
576      */
577     public void setInput(byte[] buf, int off, int len) {
578         input.setInput(buf, off, len);
579         totalIn += len;
580     }
581 
582     /***
583      * Inflates the compressed stream to the output buffer.  If this
584      * returns 0, you should check, whether needsDictionary(),
585      * needsInput() or finished() returns true, to determine why no 
586      * further output is produced.
587      * @param buf the output buffer.
588      * @return the number of bytes written to the buffer, 0 if no further
589      * output can be produced.  
590      * @exception DataFormatException if deflated stream is invalid.
591      * @exception IllegalArgumentException if buf has length 0.
592      */
593     public int inflate(byte[] buf) throws DataFormatException {
594         return inflate(buf, 0, buf.length);
595     }
596 
597     /***
598      * Inflates the compressed stream to the output buffer.  If this
599      * returns 0, you should check, whether needsDictionary(),
600      * needsInput() or finished() returns true, to determine why no 
601      * further output is produced.
602      * @param buf the output buffer.
603      * @param off the offset into buffer where the output should start.
604      * @param len the maximum length of the output.
605      * @return the number of bytes written to the buffer, 0 if no further
606      * output can be produced.  
607      * @exception DataFormatException if deflated stream is invalid.
608      * @exception IndexOutOfBoundsException if the off and/or len are wrong.
609      */
610     public int inflate(byte[] buf, int off, int len)
611         throws DataFormatException {
612         /* Special case: len may be zero */
613         if (len == 0)
614             return 0;
615         /* Check for correct buff, off, len triple */
616         if (0 > off || off > off + len || off + len > buf.length)
617             throw new ArrayIndexOutOfBoundsException();
618         int count = 0;
619         int more;
620         do {
621             if (mode != DECODE_CHKSUM) {
622                 /* Don't give away any output, if we are waiting for the
623                  * checksum in the input stream.
624                  *
625                  * With this trick we have always:
626                  *   needsInput() and not finished() 
627                  *   implies more output can be produced.  
628                  */
629                 more = outputWindow.copyOutput(buf, off, len);
630                 adler.update(buf, off, more);
631                 off += more;
632                 count += more;
633                 totalOut += more;
634                 len -= more;
635                 if (len == 0)
636                     return count;
637             }
638         } while (
639             decode()
640                 || (outputWindow.getAvailable() > 0 && mode != DECODE_CHKSUM));
641         return count;
642     }
643 
644     /***
645      * Returns true, if the input buffer is empty.
646      * You should then call setInput(). <br>
647      *
648      * <em>NOTE</em>: This method also returns true when the stream is finished.
649      */
650     public boolean needsInput() {
651         return input.needsInput();
652     }
653 
654     /***
655      * Returns true, if a preset dictionary is needed to inflate the input.
656      */
657     public boolean needsDictionary() {
658         return mode == DECODE_DICT && neededBits == 0;
659     }
660 
661     /***
662      * Returns true, if the inflater has finished.  This means, that no
663      * input is needed and no output can be produced.
664      */
665     public boolean finished() {
666         return mode == FINISHED && outputWindow.getAvailable() == 0;
667     }
668 
669     /***
670      * Gets the adler checksum.  This is either the checksum of all
671      * uncompressed bytes returned by inflate(), or if needsDictionary()
672      * returns true (and thus no output was yet produced) this is the
673      * adler checksum of the expected dictionary.
674      * @return the adler checksum.
675      */
676     public int getAdler() {
677         return needsDictionary() ? readAdler : (int) adler.getValue();
678     }
679 
680     /***
681      * Gets the total number of output bytes returned by inflate().
682      * @return the total number of output bytes.
683      */
684     public int getTotalOut() {
685         return totalOut;
686     }
687 
688     /***
689      * Gets the total number of processed compressed input bytes.
690      * @return the total number of bytes of processed input bytes.
691      */
692     public int getTotalIn() {
693         return totalIn - getRemaining();
694     }
695 
696     /***
697      * Gets the number of unprocessed input.  Useful, if the end of the
698      * stream is reached and you want to further process the bytes after
699      * the deflate stream.  
700      * @return the number of bytes of the input which were not processed.
701      */
702     public int getRemaining() {
703         return input.getAvailableBytes();
704     }
705 
706     /***
707      * Frees all objects allocated by the inflater.  There's no reason
708      * to call this, since you can just rely on garbage collection (even
709      * for the Sun implementation).  Exists only for compatibility
710      * with Sun's JDK, where the compressor allocates native memory.
711      * If you call any method (even reset) afterwards the behaviour is
712      * <i>undefined</i>.  
713      * @deprecated Just clear all references to inflater instead.
714      */
715     public void end() {
716         outputWindow = null;
717         input = null;
718         dynHeader = null;
719         litlenTree = null;
720         distTree = null;
721         adler = null;
722     }
723 
724     /***
725      * Finalizes this object.
726      */
727     protected void finalize() throws Throwable {
728         /* Exists only for compatibility */
729         super.finalize();
730     }
731 }